library(ggplot2)
library(knitr)
library(dplyr)
library(org.Mm.eg.db)
library(DT)
library(GeneOverlap)
library(GO.db)
library(KEGGREST)
library(KEGG.db)
library(fgsea)
library(maftools)
library(pheatmap)
base_dir <- "/media/theron/My_Passport/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/DESeq2/refseq_genes"
PD1_vs_combo <- read.table(sprintf("%s/%s",base_dir,"PD1_vs_Combo_genes_DESeq2.txt"),header=T)
untreated_vs_combo <- read.table(sprintf("%s/%s",base_dir,"Untreated_vs_Combo_genes_DESeq2.txt"),header=T)
untreated_vs_PD1 <- read.table(sprintf("%s/%s",base_dir,"Untreated_vs_PD1_genes_DESeq2.txt"),header=T)
which_sig <- vapply(as.numeric(PD1_vs_combo$PD1_vs_Combo_pvalue),function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
PD1_vs_combo$significance_pval <- which_sig
which_sig <- vapply(PD1_vs_combo$PD1_vs_Combo_padj,function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
PD1_vs_combo$significance_padj <- which_sig
PD1_vs_combo_table <- PD1_vs_combo %>% dplyr::filter(!is.na(PD1_vs_Combo_pvalue) | !is.na(PD1_vs_Combo_padj))
datatable(PD1_vs_combo_table)
ggplot(PD1_vs_combo,aes(x=PD1_vs_Combo_log2FoldChange, y=-log10(PD1_vs_Combo_pvalue), color = significance_pval))+
geom_text(label=PD1_vs_combo$gene_symbol)
ggplot(PD1_vs_combo,aes(x=PD1_vs_Combo_log2FoldChange, y=-log10(PD1_vs_Combo_padj), color = significance_padj))+
geom_text(label=PD1_vs_combo$gene_symbol)
## gsea results on GO and KEGG terms
symbol_to_entrez <- as.list(org.Mm.egALIAS2EG)
PD1_vs_combo$entrez <- symbol_to_entrez[PD1_vs_combo$gene_symbol]
SYMBOLToGO <- mapIds(org.Mm.eg.db,keys=PD1_vs_combo$gene_symbol,column='GO',keytype = 'SYMBOL',multiVals = list)
GOToSYMBOL <- sapply(reverseSplit(SYMBOLToGO),unique)
GOToSYMBOL <- GOToSYMBOL[sapply(GOToSYMBOL,length)>5]
SYMBOLToKEGG <- mapIds(org.Mm.eg.db,keys=PD1_vs_combo$gene_symbol,column='PATH',keytype = 'SYMBOL',multiVals = list)
KEGGToSYMBOL <- sapply(reverseSplit(SYMBOLToKEGG),unique)
KEGGToSYMBOL <- KEGGToSYMBOL[sapply(KEGGToSYMBOL,length)>5]
diff_dat_filt <- PD1_vs_combo %>% dplyr::filter(!is.na(PD1_vs_Combo_log2FoldChange) & !is.na(PD1_vs_Combo_pvalue))
ranks <- diff_dat_filt$PD1_vs_Combo_log2FoldChange*-log10(diff_dat_filt$PD1_vs_Combo_pvalue)
names(ranks)<-diff_dat_filt$gene_symbol
ranks<-ranks[unname(!is.na(ranks))]
# ranks<-rank(ranks,ties.method = "random")
fgseaRes_GO <- fgsea(GOToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_GO<-cbind(sapply(c('ONTOLOGY','TERM','DEFINITION'),
function(x){mapIds(GO.db,keys=fgseaRes_GO$pathway,keytype='GOID',column=x)}),fgseaRes_GO)
fgseaRes_KEGG <- fgsea(KEGGToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_KEGG <- cbind(KEGG.Name=unlist(as.list(KEGGPATHID2NAME)[fgseaRes_KEGG$pathway]),
fgseaRes_KEGG)
pathways<- readRDS("/media/theron/My_Passport/LM_LMT_LUCIANE_cellranger/references/gene_sets/mouse_hallmark.rds")
pathways_list <- lapply(unique(pathways$gs_name),function(pathway){
pathway_small <- pathways %>% dplyr::filter(gs_name == pathway)
return(pathway_small$gene_symbol)
})
names(pathways_list)<-unique(pathways$gs_name)
fgseaRes_HALLMARK <- fgsea(pathways_list, ranks, eps=0)
fgseaRes_HALLMARK <- fgseaRes_HALLMARK %>% dplyr::filter(padj <= 0.05)
GO_table <- fgseaRes_GO %>% dplyr::filter(padj <= 0.05)
GO_table<-GO_table[,c("TERM","DEFINITION","padj","NES","size")]
datatable(GO_table)
KEGG_table <- fgseaRes_KEGG %>% dplyr::filter(padj <= 0.05)
KEGG_table<-KEGG_table[,c("KEGG.Name","padj","NES","size")]
datatable(KEGG_table)
datatable(fgseaRes_HALLMARK[,c(1,3,6,7)])
which_sig <- vapply(as.numeric(untreated_vs_combo$Untreated_vs_Combo_pvalue),function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
untreated_vs_combo$significance_pval <- which_sig
which_sig <- vapply(untreated_vs_combo$Untreated_vs_Combo_padj,function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
untreated_vs_combo$significance_padj <- which_sig
untreated_vs_combo_table <- untreated_vs_combo %>% dplyr::filter(!is.na(Untreated_vs_Combo_pvalue) | !is.na(Untreated_vs_Combo_padj))
datatable(untreated_vs_combo_table)
ggplot(untreated_vs_combo,aes(x=Untreated_vs_Combo_log2FoldChange, y=-log10(Untreated_vs_Combo_pvalue), color = significance_pval))+
geom_text(label=untreated_vs_combo$gene_symbol)
ggplot(untreated_vs_combo,aes(x=Untreated_vs_Combo_log2FoldChange, y=-log10(Untreated_vs_Combo_padj), color = significance_padj))+
geom_text(label=untreated_vs_combo$gene_symbol)
symbol_to_entrez <- as.list(org.Mm.egALIAS2EG)
untreated_vs_combo$entrez <- symbol_to_entrez[untreated_vs_combo$gene_symbol]
SYMBOLToGO <- mapIds(org.Mm.eg.db,keys=untreated_vs_combo$gene_symbol,column='GO',keytype = 'SYMBOL',multiVals = list)
GOToSYMBOL <- sapply(reverseSplit(SYMBOLToGO),unique)
GOToSYMBOL <- GOToSYMBOL[sapply(GOToSYMBOL,length)>5]
SYMBOLToKEGG <- mapIds(org.Mm.eg.db,keys=untreated_vs_combo$gene_symbol,column='PATH',keytype = 'SYMBOL',multiVals = list)
KEGGToSYMBOL <- sapply(reverseSplit(SYMBOLToKEGG),unique)
KEGGToSYMBOL <- KEGGToSYMBOL[sapply(KEGGToSYMBOL,length)>5]
diff_dat_filt <- untreated_vs_combo %>% dplyr::filter(!is.na(Untreated_vs_Combo_log2FoldChange) & !is.na(Untreated_vs_Combo_pvalue))
ranks <- diff_dat_filt$Untreated_vs_Combo_log2FoldChange*-log10(diff_dat_filt$Untreated_vs_Combo_pvalue)
names(ranks)<-diff_dat_filt$gene_symbol
ranks<-ranks[unname(!is.na(ranks))]
# ranks<-rank(ranks,ties.method = "random")
fgseaRes_GO <- fgsea(GOToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_GO<-cbind(sapply(c('ONTOLOGY','TERM','DEFINITION'),
function(x){mapIds(GO.db,keys=fgseaRes_GO$pathway,keytype='GOID',column=x)}),fgseaRes_GO)
fgseaRes_KEGG <- fgsea(KEGGToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_KEGG <- cbind(KEGG.Name=unlist(as.list(KEGGPATHID2NAME)[fgseaRes_KEGG$pathway]),
fgseaRes_KEGG)
pathways_list <- readRDS("/media/theron/My_Passport/LM_LMT_LUCIANE_cellranger/references/gene_sets/pathways_list_ensembl.rds")
fgseaRes_HALLMARK <- fgsea(pathways_list, ranks, eps=0)
fgseaRes_HALLMARK <- fgseaRes_HALLMARK %>% dplyr::filter(padj <= 0.05)
GO_table <- fgseaRes_GO %>% dplyr::filter(padj <= 0.05)
GO_table<-GO_table[,c("TERM","DEFINITION","padj","NES","size")]
datatable(GO_table)
KEGG_table <- fgseaRes_KEGG %>% dplyr::filter(padj <= 0.05)
KEGG_table<-KEGG_table[,c("KEGG.Name","padj","NES","size")]
datatable(KEGG_table)
datatable(fgseaRes_HALLMARK[,c(1,3,6,7)])
which_sig <- vapply(as.numeric(untreated_vs_PD1$Untreated_vs_PD1_pvalue),function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
untreated_vs_PD1$significance_pval <- which_sig
which_sig <- vapply(untreated_vs_PD1$Untreated_vs_PD1_padj,function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
untreated_vs_PD1$significance_padj <- which_sig
untreated_vs_PD1_table <- untreated_vs_PD1 %>% dplyr::filter(!is.na(Untreated_vs_PD1_pvalue) | !is.na(Untreated_vs_PD1_padj))
datatable(untreated_vs_PD1_table)
ggplot(untreated_vs_PD1,aes(x=Untreated_vs_PD1_log2FoldChange, y=-log10(Untreated_vs_PD1_pvalue), color = significance_pval))+
geom_text(label=untreated_vs_PD1$gene_symbol)
ggplot(untreated_vs_PD1,aes(x=Untreated_vs_PD1_log2FoldChange, y=-log10(Untreated_vs_PD1_padj), color = significance_padj))+
geom_text(label=untreated_vs_PD1$gene_symbol)
symbol_to_entrez <- as.list(org.Mm.egALIAS2EG)
untreated_vs_PD1$entrez <- symbol_to_entrez[untreated_vs_PD1$gene_symbol]
SYMBOLToGO <- mapIds(org.Mm.eg.db,keys=untreated_vs_PD1$gene_symbol,column='GO',keytype = 'SYMBOL',multiVals = list)
GOToSYMBOL <- sapply(reverseSplit(SYMBOLToGO),unique)
GOToSYMBOL <- GOToSYMBOL[sapply(GOToSYMBOL,length)>5]
SYMBOLToKEGG <- mapIds(org.Mm.eg.db,keys=untreated_vs_PD1$gene_symbol,column='PATH',keytype = 'SYMBOL',multiVals = list)
KEGGToSYMBOL <- sapply(reverseSplit(SYMBOLToKEGG),unique)
KEGGToSYMBOL <- KEGGToSYMBOL[sapply(KEGGToSYMBOL,length)>5]
diff_dat_filt <- untreated_vs_PD1 %>% dplyr::filter(!is.na(Untreated_vs_PD1_log2FoldChange) & !is.na(Untreated_vs_PD1_pvalue))
ranks <- diff_dat_filt$Untreated_vs_PD1_log2FoldChange*-log10(diff_dat_filt$Untreated_vs_PD1_pvalue)
names(ranks)<-diff_dat_filt$gene_symbol
ranks<-ranks[unname(!is.na(ranks))]
# ranks<-rank(ranks,ties.method = "random")
fgseaRes_GO <- fgsea(GOToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_GO<-cbind(sapply(c('ONTOLOGY','TERM','DEFINITION'),
function(x){mapIds(GO.db,keys=fgseaRes_GO$pathway,keytype='GOID',column=x)}),fgseaRes_GO)
fgseaRes_KEGG <- fgsea(KEGGToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_KEGG <- cbind(KEGG.Name=unlist(as.list(KEGGPATHID2NAME)[fgseaRes_KEGG$pathway]),
fgseaRes_KEGG)
pathways_list <- readRDS("/media/theron/My_Passport/LM_LMT_LUCIANE_cellranger/references/gene_sets/pathways_list_ensembl.rds")
fgseaRes_HALLMARK <- fgsea(pathways_list, ranks, eps=0)
fgseaRes_HALLMARK <- fgseaRes_HALLMARK %>% dplyr::filter(padj <= 0.05)
GO_table <- fgseaRes_GO %>% dplyr::filter(padj <= 0.05)
GO_table<-GO_table[,c("TERM","DEFINITION","padj","NES","size")]
datatable(GO_table)
KEGG_table <- fgseaRes_KEGG %>% dplyr::filter(padj <= 0.05)
KEGG_table<-KEGG_table[,c("KEGG.Name","padj","NES","size")]
datatable(KEGG_table)
datatable(fgseaRes_HALLMARK[,c(1,3,6,7)])
maf_file <- "/media/theron/My_Passport/Ali_data/WES Data for 4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/DNA_4TIMSI_haplotypecaller.maf"
ali_maf = read.maf(maf_file)
## -Reading
## -Validating
## -Silent variants: 817928
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 12.0s elapsed (23.4s cpu)
ali_gene_summ = getGeneSummary(ali_maf)
rownames(ali_gene_summ) <- ali_gene_summ$Hugo_Symbol
rownames(PD1_vs_combo) <- PD1_vs_combo$gene_symbol
rownames(untreated_vs_combo) <- untreated_vs_combo$gene_symbol
rownames(untreated_vs_PD1) <- untreated_vs_PD1$gene_symbol
ali_gene_summ <- cbind(ali_gene_summ,PD1_vs_combo[ali_gene_summ$Hugo_Symbol,c("PD1_vs_Combo_log2FoldChange","significance_pval","significance_padj")])
ali_gene_summ <- cbind(ali_gene_summ,untreated_vs_combo[ali_gene_summ$Hugo_Symbol,c("Untreated_vs_Combo_log2FoldChange","significance_pval","significance_padj")])
ali_gene_summ <- cbind(ali_gene_summ,untreated_vs_PD1[ali_gene_summ$Hugo_Symbol,c("Untreated_vs_PD1_log2FoldChange","significance_pval","significance_padj")])
cols <- colnames(ali_gene_summ)
cols[seq(15,22)]<-c("pc_significance_pval","pc_significance_padj",
"Untreated_vs_Combo_log2FoldChange","uc_significance_pval","uc_significance_padj",
"Untreated_vs_PD1_log2FoldChange","up_significance_pval","up_significance_padj")
colnames(ali_gene_summ) <- cols
datatable(ali_gene_summ)
ali_gene_summ_PD1 <- ali_gene_summ %>% dplyr::filter(pc_significance_pval=="Sig")
ggplot(ali_gene_summ_PD1,aes(y=log2(Missense_Mutation),x=PD1_vs_Combo_log2FoldChange))+
geom_text(label=ali_gene_summ_PD1$Hugo_Symbol)+labs(title="pval Significant Genes")
datatable(ali_gene_summ_PD1)
ali_gene_summ_PD1 <- ali_gene_summ %>% dplyr::filter(pc_significance_padj=="Sig")
ggplot(ali_gene_summ_PD1,aes(y=log2(Missense_Mutation),x=PD1_vs_Combo_log2FoldChange))+
geom_text(label=ali_gene_summ_PD1$Hugo_Symbol)+labs(title="padj Significant Genes")
datatable(ali_gene_summ_PD1)
ali_gene_summ_uc <- ali_gene_summ %>% dplyr::filter(uc_significance_pval=="Sig")
ggplot(ali_gene_summ_uc,aes(y=log2(Missense_Mutation),x=Untreated_vs_Combo_log2FoldChange))+
geom_text(label=ali_gene_summ_uc$Hugo_Symbol)+labs(title="pval Significant Genes")
datatable(ali_gene_summ_uc)
ali_gene_summ_uc <- ali_gene_summ %>% dplyr::filter(uc_significance_padj=="Sig")
ggplot(ali_gene_summ_uc,aes(y=log2(Missense_Mutation),x=Untreated_vs_Combo_log2FoldChange))+
geom_text(label=ali_gene_summ_uc$Hugo_Symbol)+labs(title="padj Significant Genes")
datatable(ali_gene_summ_uc)
ali_gene_summ_up <- ali_gene_summ %>% dplyr::filter(up_significance_pval=="Sig")
ggplot(ali_gene_summ_up,aes(y=log2(Missense_Mutation),x=Untreated_vs_PD1_log2FoldChange))+
geom_text(label=ali_gene_summ_up$Hugo_Symbol)+labs(title="pval Significant Genes")
datatable(ali_gene_summ_up)
ali_gene_summ_up <- ali_gene_summ %>% dplyr::filter(up_significance_padj=="Sig")
ggplot(ali_gene_summ_up,aes(y=log2(Missense_Mutation),x=Untreated_vs_PD1_log2FoldChange))+
geom_text(label=ali_gene_summ_up$Hugo_Symbol)+labs(title="padj Significant Genes")
datatable(ali_gene_summ_up)
tpm_gene_expression_file <- "/media/theron/My_Passport/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/RSEM/tables/genes/genes_tpm_all_samples_norm.txt"
tpm_gene_expression <- read.table(tpm_gene_expression_file,header=T,sep="\t")
maf_file <- "/media/theron/My_Passport/Ali_data/WES Data for 4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/DNA_4TIMSI_haplotypecaller.maf"
ali_maf = read.maf(maf_file)
## -Reading
## -Validating
## -Silent variants: 817928
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 8.406s elapsed (18.7s cpu)
ali_gene_summ = as.data.frame(getGeneSummary(ali_maf))
rownames(ali_gene_summ) <- ali_gene_summ$Hugo_Symbol
missense_dat <- ali_gene_summ[tpm_gene_expression$gene_id,]
missense_dat <- missense_dat[complete.cases(missense_dat),]
rownames(tpm_gene_expression) <- tpm_gene_expression$gene_id
Missense_Mutation<-missense_dat$Missense_Mutation
tpm_gene_expression <- cbind(tpm_gene_expression[rownames(missense_dat),seq(2,ncol(tpm_gene_expression))],Missense_Mutation)
tpm_gene_expression <- tpm_gene_expression[complete.cases(tpm_gene_expression),]
combo<-apply(tpm_gene_expression[,seq(6)],1,mean)
pd1<-apply(tpm_gene_expression[,seq(7,12)],1,mean)
untreated<-apply(tpm_gene_expression[,seq(12,ncol(tpm_gene_expression))],1,mean)
Missense_Mutation<-tpm_gene_expression$Missense_Mutation
average_expression<-data.frame(c(combo,pd1,untreated),
rep(Missense_Mutation,3),
c(rep("combo",length(combo)),
rep("pd1",length(pd1)),
rep("untreated",length(untreated))))
colnames(average_expression)<-c("TPM_expression","Missense_Mutation","type")
average_expression_compared<-data.frame(combo,pd1,untreated,Missense_Mutation)
ggplot(average_expression,aes(x=log2(Missense_Mutation+1),y=log2(TPM_expression+1),color=type))+geom_point(shape=1)